import pandas
import os
import matplotlib.pyplot as plt
import numpy as np


# 示例程序
# - 代表队： Morocco
# - 运动员： Soufiane El
# - 项目：   Men's 3000m Steeplechase/ Athletics Men's 3,000 metres Steeplechase

# 读取 CSV 数据
baseDir = './2025_Problem_C_Data'
athletes = pandas.read_csv(os.path.join(baseDir, 'summerOly_athletes.csv'), encoding='utf-8', low_memory=False)


# 筛选出该项目具有竞争力的选手
Soufiane = athletes[athletes['Name'] == 'Soufiane El']
Men3000BestAthletes = athletes[
    ((athletes['Event'] == 'Men\'s 3000m Steeplechase') |
     (athletes['Event'] == 'Athletics Men\'s 3,000 metres Steeplechase')) &
    (athletes['Year'] >= 2020) & 
    (athletes['Medal'] != 'No medal')]

print(Soufiane)
print(Men3000BestAthletes)


# 筛选出 Men3000BestAthletes 中选手的参赛记录
record = athletes[
    athletes['Name'].isin(Men3000BestAthletes['Name']) &
    ((athletes['Event'] == 'Men\'s 3000m Steeplechase') |
    (athletes['Event'] == 'Athletics Men\'s 3,000 metres Steeplechase'))
    ]

print(record)


# 计算 Soufiane El 的获奖概率
Soufiane_prob = len(record[record['Name'] == 'Soufiane El']) / len(record)
print(Soufiane_prob)
# 计算各个选手的获奖概率
prob_dict = {}
for name in Men3000BestAthletes['Name']:
    prob = len(record[record['Name'] == name]) / len(record)
    print(name, prob)
    prob_dict[name] = prob


